import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from warnings import filterwarnings
filterwarnings('ignore')
df1 = pd.read_csv('C:/Users/purva/Downloads/ML_projects/House Pricing Project/training_set.csv')
df2 = pd.read_csv('C:/Users/purva/Downloads/ML_projects/House Pricing Project/testing_set.csv')
df1.describe()
| Id | MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | ... | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | YrSold | SalePrice | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1460.000000 | 1460.000000 | 1201.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1452.000000 | 1460.000000 | ... | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 | 1460.000000 |
| mean | 730.500000 | 56.897260 | 70.049958 | 10516.828082 | 6.099315 | 5.575342 | 1971.267808 | 1984.865753 | 103.685262 | 443.639726 | ... | 94.244521 | 46.660274 | 21.954110 | 3.409589 | 15.060959 | 2.758904 | 43.489041 | 6.321918 | 2007.815753 | 180921.195890 |
| std | 421.610009 | 42.300571 | 24.284752 | 9981.264932 | 1.382997 | 1.112799 | 30.202904 | 20.645407 | 181.066207 | 456.098091 | ... | 125.338794 | 66.256028 | 61.119149 | 29.317331 | 55.757415 | 40.177307 | 496.123024 | 2.703626 | 1.328095 | 79442.502883 |
| min | 1.000000 | 20.000000 | 21.000000 | 1300.000000 | 1.000000 | 1.000000 | 1872.000000 | 1950.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 2006.000000 | 34900.000000 |
| 25% | 365.750000 | 20.000000 | 59.000000 | 7553.500000 | 5.000000 | 5.000000 | 1954.000000 | 1967.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 5.000000 | 2007.000000 | 129975.000000 |
| 50% | 730.500000 | 50.000000 | 69.000000 | 9478.500000 | 6.000000 | 5.000000 | 1973.000000 | 1994.000000 | 0.000000 | 383.500000 | ... | 0.000000 | 25.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 6.000000 | 2008.000000 | 163000.000000 |
| 75% | 1095.250000 | 70.000000 | 80.000000 | 11601.500000 | 7.000000 | 6.000000 | 2000.000000 | 2004.000000 | 166.000000 | 712.250000 | ... | 168.000000 | 68.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 8.000000 | 2009.000000 | 214000.000000 |
| max | 1460.000000 | 190.000000 | 313.000000 | 215245.000000 | 10.000000 | 9.000000 | 2010.000000 | 2010.000000 | 1600.000000 | 5644.000000 | ... | 857.000000 | 547.000000 | 552.000000 | 508.000000 | 480.000000 | 738.000000 | 15500.000000 | 12.000000 | 2010.000000 | 755000.000000 |
8 rows × 38 columns
df1.info
<bound method DataFrame.info of Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \
0 1 60 RL 65.0 8450 Pave NaN Reg
1 2 20 RL 80.0 9600 Pave NaN Reg
2 3 60 RL 68.0 11250 Pave NaN IR1
3 4 70 RL 60.0 9550 Pave NaN IR1
4 5 60 RL 84.0 14260 Pave NaN IR1
... ... ... ... ... ... ... ... ...
1455 1456 60 RL 62.0 7917 Pave NaN Reg
1456 1457 20 RL 85.0 13175 Pave NaN Reg
1457 1458 70 RL 66.0 9042 Pave NaN Reg
1458 1459 20 RL 68.0 9717 Pave NaN Reg
1459 1460 20 RL 75.0 9937 Pave NaN Reg
LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \
0 Lvl AllPub ... 0 NaN NaN NaN 0
1 Lvl AllPub ... 0 NaN NaN NaN 0
2 Lvl AllPub ... 0 NaN NaN NaN 0
3 Lvl AllPub ... 0 NaN NaN NaN 0
4 Lvl AllPub ... 0 NaN NaN NaN 0
... ... ... ... ... ... ... ... ...
1455 Lvl AllPub ... 0 NaN NaN NaN 0
1456 Lvl AllPub ... 0 NaN MnPrv NaN 0
1457 Lvl AllPub ... 0 NaN GdPrv Shed 2500
1458 Lvl AllPub ... 0 NaN NaN NaN 0
1459 Lvl AllPub ... 0 NaN NaN NaN 0
MoSold YrSold SaleType SaleCondition SalePrice
0 2 2008 WD Normal 208500
1 5 2007 WD Normal 181500
2 9 2008 WD Normal 223500
3 2 2006 WD Abnorml 140000
4 12 2008 WD Normal 250000
... ... ... ... ... ...
1455 8 2007 WD Normal 175000
1456 2 2010 WD Normal 210000
1457 5 2010 WD Normal 266500
1458 4 2010 WD Normal 142125
1459 6 2008 WD Normal 147500
[1460 rows x 81 columns]>
df1.columns
Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF',
'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PoolQC',
'Fence', 'MiscFeature', 'MiscVal', 'MoSold', 'YrSold', 'SaleType',
'SaleCondition', 'SalePrice'],
dtype='object')
df1.shape
(1460, 81)
plt.figure(figsize=(15,6))
df1.isna().sum().plot(kind='bar')
<Axes: >
for i in df1.columns:
if df1[i].dtypes==object:
df1[i]=df1[i].fillna(df1[i].mode()[0])
else:
df1[i]=df1[i].fillna(df1[i].mean())
plt.figure(figsize=(15,6))
df1.isna().sum().plot(kind='bar')
<Axes: >
cat=[]
con=[]
for i in df1.columns:
if df1[i].dtypes==object:
cat.append(i)
else:
con.append(i)
print('categorical features :',cat)
print('-------------------------------')
print('continues features :',con)
categorical features : ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'] ------------------------------- continues features : ['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold', 'SalePrice']
from matplotlib.pyplot import show
for i in df1.columns:
if df1[i].dtypes==object:
sns.boxplot(data=df1,x='SalePrice',y=i)
print('SalePrice vs ',i)
show()
else:
sns.scatterplot(data=df1,x='SalePrice',y=i)
print('SalePrice vs ',i)
show()
SalePrice vs Id
SalePrice vs MSSubClass
SalePrice vs MSZoning
SalePrice vs LotFrontage
SalePrice vs LotArea
SalePrice vs Street
SalePrice vs Alley
SalePrice vs LotShape
SalePrice vs LandContour
SalePrice vs Utilities
SalePrice vs LotConfig
SalePrice vs LandSlope
SalePrice vs Neighborhood
SalePrice vs Condition1
SalePrice vs Condition2
SalePrice vs BldgType
SalePrice vs HouseStyle
SalePrice vs OverallQual
SalePrice vs OverallCond
SalePrice vs YearBuilt
SalePrice vs YearRemodAdd
SalePrice vs RoofStyle
SalePrice vs RoofMatl
SalePrice vs Exterior1st
SalePrice vs Exterior2nd
SalePrice vs MasVnrType
SalePrice vs MasVnrArea
SalePrice vs ExterQual
SalePrice vs ExterCond
SalePrice vs Foundation
SalePrice vs BsmtQual
SalePrice vs BsmtCond
SalePrice vs BsmtExposure
SalePrice vs BsmtFinType1
SalePrice vs BsmtFinSF1
SalePrice vs BsmtFinType2
SalePrice vs BsmtFinSF2
SalePrice vs BsmtUnfSF
SalePrice vs TotalBsmtSF
SalePrice vs Heating
SalePrice vs HeatingQC
SalePrice vs CentralAir
SalePrice vs Electrical
SalePrice vs 1stFlrSF
SalePrice vs 2ndFlrSF
SalePrice vs LowQualFinSF
SalePrice vs GrLivArea
SalePrice vs BsmtFullBath
SalePrice vs BsmtHalfBath
SalePrice vs FullBath
SalePrice vs HalfBath
SalePrice vs BedroomAbvGr
SalePrice vs KitchenAbvGr
SalePrice vs KitchenQual
SalePrice vs TotRmsAbvGrd
SalePrice vs Functional
SalePrice vs Fireplaces
SalePrice vs FireplaceQu
SalePrice vs GarageType
SalePrice vs GarageYrBlt
SalePrice vs GarageFinish
SalePrice vs GarageCars
SalePrice vs GarageArea
SalePrice vs GarageQual
SalePrice vs GarageCond
SalePrice vs PavedDrive
SalePrice vs WoodDeckSF
SalePrice vs OpenPorchSF
SalePrice vs EnclosedPorch
SalePrice vs 3SsnPorch
SalePrice vs ScreenPorch
SalePrice vs PoolArea
SalePrice vs PoolQC
SalePrice vs Fence
SalePrice vs MiscFeature
SalePrice vs MiscVal
SalePrice vs MoSold
SalePrice vs YrSold
SalePrice vs SaleType
SalePrice vs SaleCondition
SalePrice vs SalePrice
plt.figure(figsize=(15,6))
corr = df1.corr()
sns.heatmap(corr, annot=True)
plt.show()
df1.columns
Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF',
'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PoolQC',
'Fence', 'MiscFeature', 'MiscVal', 'MoSold', 'YrSold', 'SaleType',
'SaleCondition', 'SalePrice'],
dtype='object')
df1.shape
(1460, 81)
X = df1.drop(labels ='SalePrice',axis=1)
Y = df1[['SalePrice']]
from ML_codes import cat_con
cat_con(X)
categorical features : ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'] continues features : ['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']
xcat = ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
xcon = ['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']
Xcat = X[xcat]
Xcon = X[xcon]
from sklearn.preprocessing import StandardScaler , LabelEncoder
le = LabelEncoder()
ss= StandardScaler()
Xcon = pd.DataFrame(ss.fit_transform(Xcon),columns=xcon)
for i in Xcat.columns:
Xcat[i]=le.fit_transform(Xcat[i])
X = Xcon.join(Xcat)
X
| Id | MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | ... | GarageType | GarageFinish | GarageQual | GarageCond | PavedDrive | PoolQC | Fence | MiscFeature | SaleType | SaleCondition | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -1.730865 | 0.073375 | -0.229372 | -0.207142 | 0.651479 | -0.517200 | 1.050994 | 0.878668 | 0.511418 | 0.575425 | ... | 1 | 1 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 4 |
| 1 | -1.728492 | -0.872563 | 0.451936 | -0.091886 | -0.071836 | 2.179628 | 0.156734 | -0.429577 | -0.574410 | 1.171992 | ... | 1 | 1 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 4 |
| 2 | -1.726120 | 0.073375 | -0.093110 | 0.073480 | 0.651479 | -0.517200 | 0.984752 | 0.830215 | 0.323060 | 0.092907 | ... | 1 | 1 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 4 |
| 3 | -1.723747 | 0.309859 | -0.456474 | -0.096897 | 0.651479 | -0.517200 | -1.863632 | -0.720298 | -0.574410 | -0.499274 | ... | 5 | 2 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 0 |
| 4 | -1.721374 | 0.073375 | 0.633618 | 0.375148 | 1.374795 | -0.517200 | 0.951632 | 0.733308 | 1.364570 | 0.463568 | ... | 1 | 1 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1455 | 1.721374 | 0.073375 | -0.365633 | -0.260560 | -0.071836 | -0.517200 | 0.918511 | 0.733308 | -0.574410 | -0.973018 | ... | 1 | 1 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 4 |
| 1456 | 1.723747 | -0.872563 | 0.679039 | 0.266407 | -0.071836 | 0.381743 | 0.222975 | 0.151865 | 0.084843 | 0.759659 | ... | 1 | 2 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 4 |
| 1457 | 1.726120 | 0.309859 | -0.183951 | -0.147810 | 0.651479 | 3.078570 | -1.002492 | 1.024029 | -0.574410 | -0.369871 | ... | 1 | 1 | 4 | 4 | 2 | 2 | 0 | 2 | 8 | 4 |
| 1458 | 1.728492 | -0.872563 | -0.093110 | -0.080160 | -0.795151 | 0.381743 | -0.704406 | 0.539493 | -0.574410 | -0.865548 | ... | 1 | 2 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 4 |
| 1459 | 1.730865 | -0.872563 | 0.224833 | -0.058112 | -0.795151 | 0.381743 | -0.207594 | -0.962566 | -0.574410 | 0.847389 | ... | 1 | 0 | 4 | 4 | 2 | 2 | 2 | 2 | 8 | 4 |
1460 rows × 80 columns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
lr = LinearRegression()
for i in Xcat.columns:
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2)
model = lr.fit(xtrain,ytrain)
tr_pred = model.predict(xtrain)
ts_pred = model.predict(xtest)
tr_score = r2_score(tr_pred,ytrain)
ts_score = r2_score(ts_pred,ytest)
print('*******************************************************************************')
print(i)
print(tr_score)
print(ts_score)
print('*******************************************************************************')
******************************************************************************* MSZoning 0.8245928164811406 -0.0034364018180486067 ******************************************************************************* ******************************************************************************* Street 0.8323826361168949 0.7480507038008104 ******************************************************************************* ******************************************************************************* Alley 0.8331163845143464 -0.0034364256743986132 ******************************************************************************* ******************************************************************************* LotShape 0.8287303174117688 0.7232099685486453 ******************************************************************************* ******************************************************************************* LandContour 0.8388665687788034 0.7547297018945995 ******************************************************************************* ******************************************************************************* Utilities 0.8330065947128444 -0.0034364261253896355 ******************************************************************************* ******************************************************************************* LotConfig 0.8309995238601476 0.7911845587987087 ******************************************************************************* ******************************************************************************* LandSlope 0.8326685848280592 0.7927872597414736 ******************************************************************************* ******************************************************************************* Neighborhood 0.8567026931925429 0.6763744936626719 ******************************************************************************* ******************************************************************************* Condition1 0.8471466244653384 0.7398085953763389 ******************************************************************************* ******************************************************************************* Condition2 0.8239208952549005 0.856764675437776 ******************************************************************************* ******************************************************************************* BldgType 0.8513357937798982 0.701900019505364 ******************************************************************************* ******************************************************************************* HouseStyle 0.825752321112229 0.7208575651255756 ******************************************************************************* ******************************************************************************* RoofStyle 0.8613188006753336 0.7041882187743602 ******************************************************************************* ******************************************************************************* RoofMatl 0.8398478650527693 0.7920606437092762 ******************************************************************************* ******************************************************************************* Exterior1st 0.8228921585322415 -0.003436425811298438 ******************************************************************************* ******************************************************************************* Exterior2nd 0.8258299029188175 -0.0034364242254396604 ******************************************************************************* ******************************************************************************* MasVnrType 0.8258023173644041 -0.0034364259947012865 ******************************************************************************* ******************************************************************************* ExterQual 0.8216456911996279 0.7989069296463808 ******************************************************************************* ******************************************************************************* ExterCond 0.8320259102879335 0.7867431215452461 ******************************************************************************* ******************************************************************************* Foundation 0.8081730410516667 0.8738180491262793 ******************************************************************************* ******************************************************************************* BsmtQual 0.8708053823718767 0.6159064925774724 ******************************************************************************* ******************************************************************************* BsmtCond 0.7260000164358311 0.6337607070898481 ******************************************************************************* ******************************************************************************* BsmtExposure 0.8315600084616039 0.8229570211506989 ******************************************************************************* ******************************************************************************* BsmtFinType1 0.8555399599582174 -0.0034364258467534103 ******************************************************************************* ******************************************************************************* BsmtFinType2 0.8526148436627388 0.6908442588139856 ******************************************************************************* ******************************************************************************* Heating 0.827726618472953 -0.0034364260857298046 ******************************************************************************* ******************************************************************************* HeatingQC 0.8384321028654471 0.7817050241808596 ******************************************************************************* ******************************************************************************* CentralAir 0.8623401173554499 -0.003436426026917072 ******************************************************************************* ******************************************************************************* Electrical 0.7560606535151301 -0.003436426129582504 ******************************************************************************* ******************************************************************************* KitchenQual 0.8041483771591312 0.8330381314464508 ******************************************************************************* ******************************************************************************* Functional 0.8251760056263534 0.8651374435085786 ******************************************************************************* ******************************************************************************* FireplaceQu 0.865181299860468 0.44019070916043224 ******************************************************************************* ******************************************************************************* GarageType 0.8287570707875284 0.7973805809506005 ******************************************************************************* ******************************************************************************* GarageFinish 0.8296862769290205 0.7868122627360405 ******************************************************************************* ******************************************************************************* GarageQual 0.8618434651635145 0.6779404508929997 ******************************************************************************* ******************************************************************************* GarageCond 0.8292790674935437 -0.0034364261283077457 ******************************************************************************* ******************************************************************************* PavedDrive 0.8316027413126958 0.8363955577723898 ******************************************************************************* ******************************************************************************* PoolQC 0.8672234867360442 0.6725287314006727 ******************************************************************************* ******************************************************************************* Fence 0.8232859369540062 0.8142217391965945 ******************************************************************************* ******************************************************************************* MiscFeature 0.8242903876272583 0.8714713567018831 ******************************************************************************* ******************************************************************************* SaleType 0.8921378732760515 0.6065871712085489 ******************************************************************************* ******************************************************************************* SaleCondition 0.8631777827889595 -0.003436425851724101 *******************************************************************************
from statsmodels.api import add_constant,OLS
ols=OLS(ytrain,add_constant(xtrain))
model=ols.fit()
model.summary()
| Dep. Variable: | SalePrice | R-squared: | 0.883 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.875 |
| Method: | Least Squares | F-statistic: | 106.8 |
| Date: | Sat, 20 Jul 2024 | Prob (F-statistic): | 0.00 |
| Time: | 17:40:45 | Log-Likelihood: | -13618. |
| No. Observations: | 1168 | AIC: | 2.739e+04 |
| Df Residuals: | 1090 | BIC: | 2.779e+04 |
| Df Model: | 77 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 3.503e+05 | 5.5e+04 | 6.371 | 0.000 | 2.42e+05 | 4.58e+05 |
| Id | 535.3849 | 877.600 | 0.610 | 0.542 | -1186.592 | 2257.362 |
| MSSubClass | -4271.2452 | 1945.208 | -2.196 | 0.028 | -8088.020 | -454.470 |
| LotFrontage | 1082.3021 | 1154.094 | 0.938 | 0.349 | -1182.195 | 3346.799 |
| LotArea | 4443.0872 | 1008.894 | 4.404 | 0.000 | 2463.494 | 6422.681 |
| OverallQual | 1.158e+04 | 1680.831 | 6.889 | 0.000 | 8281.084 | 1.49e+04 |
| OverallCond | 6938.1428 | 1210.576 | 5.731 | 0.000 | 4562.820 | 9313.466 |
| YearBuilt | 7119.7633 | 2581.639 | 2.758 | 0.006 | 2054.218 | 1.22e+04 |
| YearRemodAdd | -74.8490 | 1439.306 | -0.052 | 0.959 | -2898.972 | 2749.274 |
| MasVnrArea | 6211.4388 | 1127.192 | 5.511 | 0.000 | 3999.728 | 8423.150 |
| BsmtFinSF1 | 8623.0758 | 1118.914 | 7.707 | 0.000 | 6427.607 | 1.08e+04 |
| BsmtFinSF2 | 325.7570 | 1303.708 | 0.250 | 0.803 | -2232.304 | 2883.818 |
| BsmtUnfSF | -1127.9579 | 903.495 | -1.248 | 0.212 | -2900.745 | 644.829 |
| TotalBsmtSF | 7948.6429 | 1314.093 | 6.049 | 0.000 | 5370.206 | 1.05e+04 |
| 1stFlrSF | 8813.5941 | 1554.109 | 5.671 | 0.000 | 5764.211 | 1.19e+04 |
| 2ndFlrSF | 1.056e+04 | 1332.369 | 7.928 | 0.000 | 7948.870 | 1.32e+04 |
| LowQualFinSF | -1283.6941 | 950.247 | -1.351 | 0.177 | -3148.213 | 580.825 |
| GrLivArea | 1.514e+04 | 1365.572 | 11.087 | 0.000 | 1.25e+04 | 1.78e+04 |
| BsmtFullBath | 465.3895 | 1335.517 | 0.348 | 0.728 | -2155.086 | 3085.865 |
| BsmtHalfBath | -547.2907 | 959.495 | -0.570 | 0.569 | -2429.957 | 1335.376 |
| FullBath | 1912.1815 | 1533.539 | 1.247 | 0.213 | -1096.840 | 4921.203 |
| HalfBath | 936.5892 | 1315.974 | 0.712 | 0.477 | -1645.540 | 3518.718 |
| BedroomAbvGr | -3913.1913 | 1440.390 | -2.717 | 0.007 | -6739.442 | -1086.940 |
| KitchenAbvGr | -4675.1851 | 1174.369 | -3.981 | 0.000 | -6979.464 | -2370.906 |
| TotRmsAbvGrd | 4725.4328 | 1999.426 | 2.363 | 0.018 | 802.273 | 8648.593 |
| Fireplaces | 3732.1940 | 1166.372 | 3.200 | 0.001 | 1443.606 | 6020.782 |
| GarageYrBlt | 1284.7988 | 1650.073 | 0.779 | 0.436 | -1952.879 | 4522.477 |
| GarageCars | 3016.0192 | 2106.055 | 1.432 | 0.152 | -1116.362 | 7148.401 |
| GarageArea | 2280.3784 | 2055.193 | 1.110 | 0.267 | -1752.203 | 6312.960 |
| WoodDeckSF | 2215.5906 | 954.388 | 2.321 | 0.020 | 342.945 | 4088.237 |
| OpenPorchSF | -611.1213 | 990.312 | -0.617 | 0.537 | -2554.254 | 1332.012 |
| EnclosedPorch | -417.5214 | 980.060 | -0.426 | 0.670 | -2340.540 | 1505.497 |
| 3SsnPorch | 845.1519 | 955.280 | 0.885 | 0.377 | -1029.244 | 2719.548 |
| ScreenPorch | 2100.9254 | 894.602 | 2.348 | 0.019 | 345.589 | 3856.262 |
| PoolArea | 1232.3222 | 1219.260 | 1.011 | 0.312 | -1160.041 | 3624.685 |
| MiscVal | -2833.7893 | 1438.043 | -1.971 | 0.049 | -5655.434 | -12.144 |
| MoSold | -922.2999 | 882.367 | -1.045 | 0.296 | -2653.629 | 809.029 |
| YrSold | -408.6439 | 886.091 | -0.461 | 0.645 | -2147.281 | 1329.993 |
| MSZoning | -926.7597 | 1650.040 | -0.562 | 0.574 | -4164.375 | 2310.855 |
| Street | 2.14e+04 | 1.4e+04 | 1.529 | 0.127 | -6062.804 | 4.89e+04 |
| Alley | -4495.0258 | 6053.999 | -0.742 | 0.458 | -1.64e+04 | 7383.784 |
| LotShape | -669.1901 | 683.982 | -0.978 | 0.328 | -2011.260 | 672.880 |
| LandContour | 2752.8194 | 1389.489 | 1.981 | 0.048 | 26.443 | 5479.196 |
| Utilities | -4.012e-11 | 1.69e-11 | -2.377 | 0.018 | -7.32e-11 | -7e-12 |
| LotConfig | -205.9756 | 559.871 | -0.368 | 0.713 | -1304.522 | 892.571 |
| LandSlope | 3216.0627 | 3931.571 | 0.818 | 0.414 | -4498.241 | 1.09e+04 |
| Neighborhood | 136.4865 | 159.160 | 0.858 | 0.391 | -175.809 | 448.782 |
| Condition1 | -1833.1329 | 1012.618 | -1.810 | 0.071 | -3820.034 | 153.768 |
| Condition2 | -1.734e+04 | 3660.277 | -4.737 | 0.000 | -2.45e+04 | -1.02e+04 |
| BldgType | -367.8936 | 1534.811 | -0.240 | 0.811 | -3379.412 | 2643.625 |
| HouseStyle | -604.2676 | 674.021 | -0.897 | 0.370 | -1926.794 | 718.259 |
| RoofStyle | 2248.4983 | 1171.016 | 1.920 | 0.055 | -49.203 | 4546.199 |
| RoofMatl | 2895.9521 | 1451.630 | 1.995 | 0.046 | 47.647 | 5744.257 |
| Exterior1st | -1203.5881 | 531.296 | -2.265 | 0.024 | -2246.067 | -161.109 |
| Exterior2nd | 742.0937 | 485.490 | 1.529 | 0.127 | -210.506 | 1694.694 |
| MasVnrType | 6510.7830 | 1624.206 | 4.009 | 0.000 | 3323.859 | 9697.707 |
| ExterQual | -8946.7256 | 1983.295 | -4.511 | 0.000 | -1.28e+04 | -5055.218 |
| ExterCond | 1264.1551 | 1318.998 | 0.958 | 0.338 | -1323.907 | 3852.218 |
| Foundation | 1690.4702 | 1792.403 | 0.943 | 0.346 | -1826.480 | 5207.420 |
| BsmtQual | -7301.9380 | 1475.000 | -4.950 | 0.000 | -1.02e+04 | -4407.778 |
| BsmtCond | 2218.7610 | 1373.472 | 1.615 | 0.107 | -476.186 | 4913.708 |
| BsmtExposure | -2704.5450 | 922.616 | -2.931 | 0.003 | -4514.848 | -894.242 |
| BsmtFinType1 | 841.3309 | 665.720 | 1.264 | 0.207 | -464.907 | 2147.569 |
| BsmtFinType2 | -460.8678 | 1458.224 | -0.316 | 0.752 | -3322.112 | 2400.376 |
| Heating | -1772.7638 | 3079.643 | -0.576 | 0.565 | -7815.463 | 4269.935 |
| HeatingQC | -450.6785 | 635.021 | -0.710 | 0.478 | -1696.680 | 795.323 |
| CentralAir | -4637.2940 | 4672.236 | -0.993 | 0.321 | -1.38e+04 | 4530.300 |
| Electrical | -635.1387 | 933.625 | -0.680 | 0.496 | -2467.045 | 1196.767 |
| KitchenQual | -8770.0820 | 1476.135 | -5.941 | 0.000 | -1.17e+04 | -5873.694 |
| Functional | 4341.3901 | 966.594 | 4.491 | 0.000 | 2444.794 | 6237.986 |
| FireplaceQu | -1249.1164 | 1151.685 | -1.085 | 0.278 | -3508.886 | 1010.653 |
| GarageType | 698.9171 | 648.012 | 1.079 | 0.281 | -572.575 | 1970.410 |
| GarageFinish | 565.7042 | 1506.910 | 0.375 | 0.707 | -2391.068 | 3522.476 |
| GarageQual | -1079.9054 | 1795.605 | -0.601 | 0.548 | -4603.138 | 2443.327 |
| GarageCond | 2516.6083 | 2064.993 | 1.219 | 0.223 | -1535.202 | 6568.418 |
| PavedDrive | 195.0864 | 2088.697 | 0.093 | 0.926 | -3903.236 | 4293.408 |
| PoolQC | -3.596e+04 | 1.45e+04 | -2.476 | 0.013 | -6.45e+04 | -7462.718 |
| Fence | 2049.6855 | 2087.469 | 0.982 | 0.326 | -2046.226 | 6145.597 |
| MiscFeature | -3.973e+04 | 1.79e+04 | -2.217 | 0.027 | -7.49e+04 | -4561.544 |
| SaleType | -85.9780 | 599.855 | -0.143 | 0.886 | -1262.979 | 1091.023 |
| SaleCondition | 2954.5572 | 860.978 | 3.432 | 0.001 | 1265.195 | 4643.920 |
| Omnibus: | 520.300 | Durbin-Watson: | 2.009 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 55517.418 |
| Skew: | -1.068 | Prob(JB): | 0.00 |
| Kurtosis: | 36.708 | Cond. No. | 1.01e+16 |
model.pvalues.sort_values(ascending=False)
YearRemodAdd 9.585354e-01
PavedDrive 9.256022e-01
SaleType 8.860549e-01
BldgType 8.106082e-01
BsmtFinSF2 8.027353e-01
...
const 2.773057e-10
OverallQual 9.485603e-12
BsmtFinSF1 2.895225e-14
2ndFlrSF 5.475919e-15
GrLivArea 3.843453e-27
Length: 81, dtype: float64
c=model.pvalues.sort_values().index[-1]
X=X.drop(labels=c,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2,random_state=21)
ols=OLS(ytrain,add_constant(xtrain))
model=ols.fit()
score=round(model.rsquared_adj,3)
c=model.pvalues.sort_values().index[-1]
print(c)
print(score)
Fence 0.859
c=model.pvalues.sort_values().index[-1]
X=X.drop(labels=c,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2,random_state=21)
ols=OLS(ytrain,add_constant(xtrain))
model=ols.fit()
score=round(model.rsquared_adj,3)
c=model.pvalues.sort_values().index[-1]
print(c)
print(score)
Foundation 0.859
c=model.pvalues.sort_values().index[-1]
X=X.drop(labels=c,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2,random_state=21)
ols=OLS(ytrain,add_constant(xtrain))
model=ols.fit()
score=round(model.rsquared_adj,3)
c=model.pvalues.sort_values().index[-1]
print(c)
print(score)
FireplaceQu 0.859
c=model.pvalues.sort_values().index[-1]
X=X.drop(labels=c,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2,random_state=21)
ols=OLS(ytrain,add_constant(xtrain))
model=ols.fit()
score=round(model.rsquared_adj,3)
c=model.pvalues.sort_values().index[-1]
print(c)
print(score)
GarageType 0.859
c=model.pvalues.sort_values().index[-1]
X=X.drop(labels=c,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2,random_state=21)
ols=OLS(ytrain,add_constant(xtrain))
model=ols.fit()
score=round(model.rsquared_adj,3)
c=model.pvalues.sort_values().index[-1]
print(c)
print(score)
Condition2 0.859
c=model.pvalues.sort_values().index[-1]
X=X.drop(labels=c,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2,random_state=21)
ols=OLS(ytrain,add_constant(xtrain))
model=ols.fit()
score=round(model.rsquared_adj,3)
c=model.pvalues.sort_values().index[-1]
print(c)
print(score)
CentralAir 0.859
c=model.pvalues.sort_values().index[-1]
X=X.drop(labels=c,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2,random_state=21)
ols=OLS(ytrain,add_constant(xtrain))
model=ols.fit()
score=round(model.rsquared_adj,3)
c=model.pvalues.sort_values().index[-1]
print(c)
print(score)
GarageYrBlt 0.859
X.shape
(1460, 73)
from sklearn.linear_model import Ridge
rr=Ridge(alpha=25)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)
tr_score=r2_score(ytrain,tr_pred)
ts_score=r2_score(ytest,ts_pred)
print(tr_score)
print(ts_score)
0.8601660549303662 0.8035245219621637
from sklearn.linear_model import Lasso
l1=Lasso(alpha=0.5)
model=l1.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)
tr_score=r2_score(ytrain,tr_pred)
ts_score=r2_score(ytest,ts_pred)
print(tr_score)
print(ts_score)
0.867998361160488 0.799011737601639
w=[]
e=0.01
for i in range(0,1000,1):
w.append(e)
e=round(e+0.01,4)
from sklearn.metrics import mean_squared_error
rr = Ridge(alpha=10)
model = rr.fit(xtrain,ytrain)
tr_pred = model.predict(xtrain)
ts_pred = model.predict(xtest)
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_score = r2_score(ytrain,tr_pred)
ts_score = r2_score(ytest,ts_pred)
print('traning score:',tr_score)
print('testing score:',ts_score)
print(tr_err)
print(ts_err)
traning score: 0.8628308466052411 testing score: 0.8019561153896838 848929674.1909673 1342205192.3466187
# MSE
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
print(tr_err)
print(ts_err)
848929674.1909673 1342205192.3466187
# RMSE
import numpy as np
training_rmse = np.sqrt(tr_err)
testing_rmse = np.sqrt(ts_err)
print(training_rmse)
print(testing_rmse)
29136.39775591635 36636.118685617046
# MAE
from sklearn.metrics import mean_absolute_error
tr_err=mean_absolute_error(ytrain,tr_pred)
ts_err=mean_absolute_error(ytest,ts_pred)
print(tr_err)
print(ts_err)
18304.01771012517 20162.481947226697
# R2 score
tr_score = r2_score(ytrain,tr_pred)
ts_score = r2_score(ytest,ts_pred)
print('traning score:',tr_score)
print('testing score:',ts_score)
traning score: 0.8628308466052411 testing score: 0.8019561153896838
# R2_adj
n = len(ytrain)
p = len(xtrain.columns)
r2_adj = 1 - (1 - tr_score) * (n - 1) / (n - p - 1)
print(r2_adj)
0.8536778775030314
n = len(ytest)
p = len(xtest.columns)
r2_adj = 1 - (1 - ts_score) * (n - 1) / (n - p - 1)
print(r2_adj)
0.7356386677908164
df2.head()
| Id | MSSubClass | MSZoning | LotFrontage | LotArea | Street | Alley | LotShape | LandContour | Utilities | ... | ScreenPorch | PoolArea | PoolQC | Fence | MiscFeature | MiscVal | MoSold | YrSold | SaleType | SaleCondition | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1461 | 20 | RH | 80.0 | 11622 | Pave | NaN | Reg | Lvl | AllPub | ... | 120 | 0 | NaN | MnPrv | NaN | 0 | 6 | 2010 | WD | Normal |
| 1 | 1462 | 20 | RL | 81.0 | 14267 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | Gar2 | 12500 | 6 | 2010 | WD | Normal |
| 2 | 1463 | 60 | RL | 74.0 | 13830 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | MnPrv | NaN | 0 | 3 | 2010 | WD | Normal |
| 3 | 1464 | 60 | RL | 78.0 | 9978 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 6 | 2010 | WD | Normal |
| 4 | 1465 | 120 | RL | 43.0 | 5005 | Pave | NaN | IR1 | HLS | AllPub | ... | 144 | 0 | NaN | NaN | NaN | 0 | 1 | 2010 | WD | Normal |
5 rows × 80 columns
from ML_codes import null_treat
null_treat(df2)
cat=[]
con=[]
for i in df2.columns:
if df2[i].dtypes==object:
cat.append(i)
else:
con.append(i)
print('categorical features :',cat)
print('-------------------------------')
print('continues features :',con)
categorical features : ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'] ------------------------------- continues features : ['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']
cat = ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
con = ['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']
Xtcat = df2[cat]
Xtcon = df2[con]
Xtcon = pd.DataFrame(ss.fit_transform(Xtcon) , columns=con)
for i in Xtcat.columns:
Xtcat[i]=le.fit_transform(Xtcat[i])
Xt = Xtcon.join(Xtcat)
xt = Xt.drop(labels=['Condition2', 'Fence', 'FireplaceQu', 'Foundation', 'GarageType', 'YearRemodAdd', 'CentralAir'], axis=1)
trained_features = xtrain.columns.tolist()
Xt = Xt[trained_features]
# Now you can make predictions
prob = model.predict(Xt)
print(prob[:5])
[[182978.1657155 ] [226412.13985591] [252956.70407142] [260503.68327767] [259828.84682967]]
preds = []
for i in prob:
if i >= 0.5:
preds.append(1)
else:
preds.append(0)
preds[:5]
[1, 1, 1, 1, 1]
df_result=df2[['Id']]
df_result['SalesPrice']=prob
df_result['probability']=preds
df_result.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1459 entries, 0 to 1458 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Id 1459 non-null int64 1 SalesPrice 1459 non-null float64 2 probability 1459 non-null int64 dtypes: float64(1), int64(2) memory usage: 34.3 KB
df_result.head()
| Id | SalesPrice | probability | |
|---|---|---|---|
| 0 | 1461 | 182978.165716 | 1 |
| 1 | 1462 | 226412.139856 | 1 |
| 2 | 1463 | 252956.704071 | 1 |
| 3 | 1464 | 260503.683278 | 1 |
| 4 | 1465 | 259828.846830 | 1 |